Submitted by:
ID1: 320983216 Name1: Maxim Kolchinsky
ID2: 321340580 Name2: Anna Romanov
The data for this lesson comes from:
> Saigi et al. “MET-Oncogenic and JAK2-Inactivating Alterations Are Independent Factors That Affect Regulation of PD-L1 Expression in Lung Cancer” PLoS ONE. 2018 Jun 13;9(6):e99625. PMID: 24926665.
Purpose: The blockade of immune checkpoints such as PDL1 and PD-1 is being exploited therapeutically in several types of malignancies. Here, we aimed to understand the contribution of the genetics of lung cancer to the ability of tumor cells to escape immunosurveillance checkpoints. Experimental Design: More than 150 primary non-small cell lung cancers, including pulmonary sarcomatoid carcinomas, were tested for levels of the HLA-I complex, PD-L1, tumor-infiltrating CD8þ lymphocytes, and alterations in main lung cancer genes. Correlations were validated in cancer cell lines using appropriate treatments to activate or inhibit selected pathways. We also performed RNA sequencing to assess changes in gene expression after these treatments. Results: MET-oncogenic activation tended to associate with positive PD-L1 immunostaining, whereas STK11 mutations were correlated with negative immunostaining. In MET-altered cancer cells, MET triggered a transcriptional increase of PD-L1 that was independent of the IFNgmediated JAK/STAT pathway. The activation of MET also upregulated other immunosuppressive genes (PDCD1LG2 and SOCS1) and transcripts involved in angiogenesis (VEGFA and NRP1) and in cell proliferation. We also report recurrent inactivating mutations in JAK2 that co-occur with alterations in MET and STK11, which prevented the induction of immunoresponse-related genes following treatment with IFNg. Conclusions: We show that MET activation promotes the expression of several negative checkpoint regulators of the immunoresponse, including PD-L1. In addition, we report inactivation of JAK2 in lung cancer cells that prevented the response to IFNg. These alterations are likely to facilitate tumor growth by enabling immune tolerance and may affect the response to immune checkpoint inhibitors
This data was downloaded from GEO (GSE:GSE109720)
library(readr)
library(dplyr)
library(ggplot2)
rawcounts <- read_csv("data/lung_counts.csv")
metadata <- read_csv("data/lung_metadata.csv")
rawcounts
metadata
library(tibble)
library(tidyr)
# a plotting function
plot_heatmap <- function(km) {
centers <- km$centers %>%
tbl_df() %>%
rownames_to_column('Cluster') %>%
gather(Sample, value, -Cluster) %>%
mutate(
Cluster = factor(Cluster),
Sample = factor(Sample)
)
ggplot(centers,aes(Sample,Cluster)) + geom_tile(aes(fill=value)) + geom_text(aes(label = round(value, 1)), angle=90, size=4) + theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=0, size=12))
}
# cluster the genes
rawcounts_RN<- data.frame(rawcounts, row.names=1)
km <- kmeans(rawcounts_RN,5)
km$centers
## AE1148 AE1149 AE1150 AE1151 AE1152 AE1153
## 1 6589.0833 8841.2332 6127.4839 7358.684 7346.2171 8890.0477
## 2 31052.4260 42212.6686 27571.1893 34696.260 32887.3373 41325.6095
## 3 101059.4762 147734.7143 89713.5238 117618.429 120754.0000 151602.5238
## 4 261075.5000 355397.5000 249188.5000 298081.500 312029.0000 330232.5000
## 5 204.3794 260.8659 184.8346 225.175 226.5053 263.4797
## AE1154 AE1155 AE1156 AE1157 AE1158 AE1159
## 1 5868.3233 7272.5940 6356.2941 8001.1913 6352.8116 7040.299
## 2 26505.6568 33457.2663 29596.4379 37330.6331 30042.5799 32418.935
## 3 90998.6190 124462.2857 104368.4286 134149.9048 106395.0952 117189.476
## 4 222399.5000 302143.5000 258949.5000 295158.0000 257717.0000 284918.500
## 5 177.6818 222.9686 196.5011 232.7849 189.8281 212.521
## AE1160 AE1161 AE1162 AE1163 AE1164 AE1165
## 1 7666.075 5913.3574 7290.1723 6847.2006 6203.9289 6185.2936
## 2 35868.917 27201.2367 34315.4142 31814.4260 29037.9941 29120.4734
## 3 143324.762 104374.0952 132841.0000 122184.8571 107568.4286 112637.7143
## 4 413308.500 281465.5000 419312.0000 335008.5000 264320.5000 344465.0000
## 5 242.584 180.0566 253.0504 219.7236 191.4948 214.1561
## AE1166 AE1167 AE1168 AE1169 AE1170 AE1171
## 1 7557.6928 6488.0501 6934.8676 7469.2434 5595.4722 6493.8734
## 2 34757.7219 29575.0533 32625.8402 36791.6982 27468.7456 31881.7219
## 3 135000.0476 111633.0476 126291.2857 169666.4762 124785.2381 141406.8095
## 4 384117.5000 304171.0000 421639.5000 605629.0000 430845.0000 507135.5000
## 5 241.4646 198.3505 238.9257 253.7209 189.6234 218.4082
## AE1172 AE1173 AE1174 AE1175 AE1176 AE1177
## 1 5550.7936 6760.0141 6311.3476 6946.255 5771.8062 6654.9367
## 2 27203.1006 33370.0118 31380.9112 29166.260 24118.6923 28170.8698
## 3 117254.7619 146959.6667 137927.1429 127871.333 104154.3333 123308.0952
## 4 422003.5000 530275.0000 492796.5000 382136.500 308577.5000 364062.5000
## 5 185.7504 226.5967 210.5955 222.858 183.9239 211.8739
## AE1178 AE1179 AE1180
## 1 5181.0136 5586.3374 7055.9640
## 2 21687.6036 23821.4024 29880.8935
## 3 94938.3333 104582.6190 133835.4286
## 4 280669.0000 308508.0000 400801.0000
## 5 165.6252 178.0214 225.1644
num_genes <- table(km$cluster)
num_genes
##
## 1 2 3 4 5
## 2054 169 21 2 56101
plot(num_genes, type="l", main='num of genes in each cluster')
plot_heatmap(km)
# remove rows of zeroes
rawcounts_RN$sum <- rowSums(rawcounts_RN)
rawcounts_clean <- rawcounts_RN[ rawcounts_RN$sum >= 10, ]
rawcounts_clean
rawcounts_clean$sum <- NULL
# apply log scaling
rawcounts_RN
rawcounts_log = log10(1+rawcounts_clean)
km <- kmeans(rawcounts_log,5)
km$centers
## AE1148 AE1149 AE1150 AE1151 AE1152 AE1153 AE1154
## 1 0.2224750 0.2595178 0.2365299 0.2465905 0.2664494 0.2856338 0.2270980
## 2 0.9926849 1.0658002 0.9937652 1.0406866 1.0859241 1.1021633 0.9782394
## 3 2.0053516 2.0718609 1.9683162 2.0457680 2.0753388 2.0995963 1.9408546
## 4 2.8768341 2.9629047 2.8159122 2.9162445 2.9151470 2.9548990 2.7748572
## 5 3.5738212 3.6867595 3.5233650 3.6183988 3.6159370 3.6748730 3.4829931
## AE1155 AE1156 AE1157 AE1158 AE1159 AE1160 AE1161
## 1 0.2611225 0.2488747 0.2554619 0.2473489 0.2562278 0.2735666 0.2295744
## 2 1.0667283 1.0603057 1.0567061 1.0293423 1.0579908 1.0154241 0.8954655
## 3 2.0608478 2.0370740 2.0484974 1.9979362 2.0467039 1.8938259 1.7459993
## 4 2.9035514 2.8555548 2.9088671 2.8171388 2.8838187 2.9247148 2.7762690
## 5 3.6039529 3.5517147 3.6395846 3.5207057 3.5946933 3.6473609 3.5152520
## AE1162 AE1163 AE1164 AE1165 AE1166 AE1167 AE1168
## 1 0.3069437 0.2660096 0.2308454 0.2634969 0.2683428 0.2351569 0.2865346
## 2 1.0746557 0.9825569 0.9117918 0.9844687 1.0037558 0.9144569 1.0278350
## 3 1.9523912 1.8613406 1.7745325 1.8706251 1.8907524 1.7858088 1.9209397
## 4 2.9481320 2.8845318 2.8080246 2.8748433 2.9221639 2.8221034 2.9211485
## 5 3.6335486 3.5997041 3.5385613 3.5622239 3.6428058 3.5588146 3.6114502
## AE1169 AE1170 AE1171 AE1172 AE1173 AE1174 AE1175
## 1 0.4410393 0.373576 0.4068394 0.3665689 0.409207 0.3943318 0.2684219
## 2 1.1728400 1.052466 1.1166752 1.0400485 1.123027 1.1024374 0.9364739
## 3 2.0208283 1.892123 1.9587703 1.8819145 1.968288 1.9379473 1.8364030
## 4 2.9333166 2.806446 2.8682297 2.7963220 2.882997 2.8517416 2.8476059
## 5 3.6191436 3.491511 3.5584802 3.4891624 3.575307 3.5434462 3.5860752
## AE1176 AE1177 AE1178 AE1179 AE1180
## 1 0.2289570 0.2646176 0.2237919 0.2291777 0.2697421
## 2 0.8611594 0.9207595 0.8351714 0.8435144 0.9420726
## 3 1.7454409 1.8129884 1.7102739 1.7326465 1.8332838
## 4 2.7616280 2.8245047 2.7176708 2.7502298 2.8514261
## 5 3.5038501 3.5662006 3.4583779 3.4908875 3.5926027
num_genes <- table(km$cluster)
num_genes
##
## 1 2 3 4 5
## 9311 4629 3569 5789 4401
plot(num_genes, type="l", main='num of genes in each cluster')
plot_heatmap(km)
# Apply standardization
rawcounts_scaled <- scale(rawcounts_log)
km <- kmeans(rawcounts_scaled,5)
km$centers
## AE1148 AE1149 AE1150 AE1151 AE1152 AE1153
## 1 0.2528079 0.2451968 0.2458377 0.2538863 0.2649167 0.2567951
## 2 0.8964867 0.8893151 0.8828349 0.8935212 0.8863283 0.8799729
## 3 1.4114811 1.4119526 1.4156985 1.4092971 1.4048429 1.4039390
## 4 -1.0693269 -1.0688768 -1.0633402 -1.0722110 -1.0782449 -1.0693484
## 5 -0.5008578 -0.4873795 -0.4943596 -0.4901676 -0.4734303 -0.4761762
## AE1154 AE1155 AE1156 AE1157 AE1158 AE1159
## 1 0.2447623 0.2625378 0.2683054 0.2498533 0.2583316 0.2608463
## 2 0.8760129 0.8860388 0.8821716 0.8804559 0.8744648 0.8826296
## 3 1.4127032 1.4041744 1.4048258 1.4157064 1.4051733 1.4108421
## 4 -1.0586070 -1.0739779 -1.0785432 -1.0690393 -1.0662110 -1.0741021
## 5 -0.4916615 -0.4791412 -0.4702332 -0.4830918 -0.4779076 -0.4795858
## AE1160 AE1161 AE1162 AE1163 AE1164 AE1165
## 1 0.1468150 0.1198996 0.1673943 0.1452124 0.1274776 0.1580055
## 2 0.8973837 0.8894981 0.8979515 0.8978315 0.8936499 0.9007449
## 3 1.4240540 1.4423877 1.4017358 1.4245303 1.4359796 1.4101721
## 4 -1.0365985 -1.0180156 -1.0427133 -1.0321912 -1.0220277 -1.0340571
## 5 -0.4976051 -0.5213826 -0.4809970 -0.5061895 -0.5183713 -0.5024793
## AE1166 AE1167 AE1168 AE1169 AE1170 AE1171
## 1 0.1481989 0.1284032 0.1652151 0.1754403 0.1611897 0.1696153
## 2 0.8979212 0.8933030 0.8994738 0.8715029 0.8722969 0.8709739
## 3 1.4222051 1.4381401 1.4073590 1.3956382 1.4062621 1.4039338
## 4 -1.0346146 -1.0215992 -1.0380963 -1.0333268 -1.0232746 -1.0306172
## 5 -0.5015744 -0.5215339 -0.4957272 -0.4671921 -0.4872764 -0.4752458
## AE1172 AE1173 AE1174 AE1175 AE1176 AE1177
## 1 0.1597162 0.1691050 0.1655199 0.1384030 0.1238105 0.1326525
## 2 0.8720849 0.8718143 0.8711739 0.8850181 0.8846325 0.8835753
## 3 1.4123471 1.4040635 1.4059962 1.4240011 1.4340972 1.4275570
## 4 -1.0237152 -1.0311316 -1.0302534 -1.0126447 -1.0052797 -1.0095451
## 5 -0.4907044 -0.4749960 -0.4750045 -0.5235211 -0.5360008 -0.5268240
## AE1178 AE1179 AE1180
## 1 0.1194956 0.1214050 0.1338500
## 2 0.8831780 0.8858309 0.8852047
## 3 1.4386298 1.4360721 1.4257434
## 4 -1.0017273 -1.0019488 -1.0131401
## 5 -0.5422155 -0.5441582 -0.5208928
num_genes <- table(km$cluster)
num_genes
##
## 1 2 3 4 5
## 3580 5810 4362 9297 4650
plot(num_genes, type="l", main='num of genes in each cluster')
plot_heatmap(km)
# load DE gene list
DE_res <- read_csv("data/sigresults.csv")
DE_genes <- DE_res$row
# example of filtering a data frame
# good_raw_names <- row.names(rawcounts_RN[runif(5,0,nrow(rawcounts_RN)),])
# good_raw_names
dat_filtered = rawcounts_log[DE_genes,]
rawcounts_scaled <- scale(dat_filtered)
km <- kmeans(rawcounts_scaled,5)
km$centers
## AE1148 AE1149 AE1150 AE1151 AE1152 AE1153
## 1 -0.5679043 -0.5710244 -0.5646092 -0.5635655 -0.5572134 -0.5538821
## 2 0.1305102 0.1190975 0.1131888 0.1278225 0.1246943 0.1182771
## 3 0.6921359 0.6883703 0.6834653 0.6900177 0.6837953 0.6769214
## 4 -1.4186601 -1.4042857 -1.4001178 -1.4174385 -1.4131434 -1.3985486
## 5 1.2642194 1.2681902 1.2715872 1.2648753 1.2672687 1.2631622
## AE1154 AE1155 AE1156 AE1157 AE1158 AE1159
## 1 -0.5600944 -0.5577884 -0.5500426 -0.5612359 -0.5515806 -0.5594106
## 2 0.1109551 0.1260730 0.1230321 0.1107905 0.1136236 0.1195345
## 3 0.6738198 0.6808053 0.6788820 0.6817834 0.6711872 0.6821012
## 4 -1.3866043 -1.4083430 -1.4123522 -1.4019121 -1.3912170 -1.4097971
## 5 1.2679550 1.2649363 1.2695092 1.2766971 1.2667223 1.2747334
## AE1160 AE1161 AE1162 AE1163 AE1164 AE1165
## 1 -0.6829497 -0.69230036 -0.6637865 -0.6804427 -0.68754767 -0.6694058
## 2 0.1080044 0.08733502 0.1234108 0.1078759 0.09057776 0.1204023
## 3 0.7025155 0.70183119 0.6956437 0.7017723 0.70309774 0.6989594
## 4 -1.3155995 -1.29956885 -1.3176688 -1.3158176 -1.30384606 -1.3179860
## 5 1.2526127 1.26701374 1.2272945 1.2516227 1.26120947 1.2316046
## AE1166 AE1167 AE1168 AE1169 AE1170 AE1171
## 1 -0.6764159 -0.68630044 -0.6615795 -0.72306084 -0.73085504 -0.73275212
## 2 0.1069523 0.09250882 0.1207823 0.06216534 0.06053769 0.05927586
## 3 0.7034191 0.70400625 0.6985243 0.68600817 0.68696520 0.68710566
## 4 -1.3177964 -1.30796821 -1.3229750 -1.23180963 -1.22748292 -1.23049206
## 5 1.2477220 1.26169383 1.2302591 1.26464953 1.26749226 1.27559219
## AE1172 AE1173 AE1174 AE1175 AE1176 AE1177
## 1 -0.73003646 -0.73077177 -0.73333318 -0.69473459 -0.70456188 -0.69613378
## 2 0.05774518 0.05864332 0.05696841 0.08755498 0.08122156 0.08295579
## 3 0.68837087 0.68793246 0.68714281 0.69946044 0.70135830 0.69999229
## 4 -1.23359967 -1.23164832 -1.22851461 -1.27756735 -1.27180358 -1.27539189
## 5 1.27647625 1.27414198 1.27614209 1.24116187 1.24831702 1.24429253
## AE1178 AE1179 AE1180
## 1 -0.70449348 -0.69822741 -0.69284635
## 2 0.07953479 0.08273517 0.08558701
## 3 0.70144498 0.70187376 0.70028015
## 4 -1.27263484 -1.27849094 -1.27944355
## 5 1.25145116 1.24796525 1.24261055
num_genes <- table(km$cluster)
num_genes
##
## 1 2 3 4 5
## 3055 3267 5158 3895 2582
plot(num_genes, type="l", main='num of genes in each cluster')
plot_heatmap(km)
# a plotting function
plot_heatmap <- function(km) {
centers <- km$centers %>%
tbl_df() %>%
rownames_to_column('Cluster') %>%
gather(Gene, value, -Cluster) %>%
mutate(
Cluster = factor(Cluster),
Gene = factor(Gene)
)
ggplot(centers,aes(Cluster,Gene)) + geom_tile(aes(fill=value)) + theme(axis.text.x=element_text(angle=0, vjust=0.5, hjust=0, size=12)) + theme(axis.text.y=element_blank())
}
# cluster the samples
rawcounts_clean <- rawcounts_RN[ rawcounts_RN$sum >= 10, ]
rawcounts_log = log10(1+rawcounts_clean)
DE_res <- read_csv("data/sigresults.csv")
DE_genes <- DE_res$row
dat_filtered = rawcounts_log[DE_genes,]
km <- kmeans(t(dat_filtered),4)
#km$centers
table(km$cluster)
##
## 1 2 3 4
## 6 10 12 6
plot_heatmap(km)
sessionInfo()The sessionInfo() prints version information about R and any attached packages. It’s a good practice to always run this command at the end of your R session and record it for the sake of reproducibility in the future.
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 tidyr_0.8.2 tibble_1.4.2 dplyr_0.7.8
## [5] readr_1.3.0 ggplot2_3.1.0 knitr_1.21
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.0 pillar_1.3.1 compiler_3.5.1 plyr_1.8.4
## [5] bindr_0.1.1 tools_3.5.1 digest_0.6.18 evaluate_0.12
## [9] gtable_0.2.0 pkgconfig_2.0.2 rlang_0.3.0.1 yaml_2.2.0
## [13] xfun_0.4 withr_2.1.2 stringr_1.3.1 hms_0.4.2
## [17] grid_3.5.1 tidyselect_0.2.5 glue_1.3.0 R6_2.3.0
## [21] rmarkdown_1.11 purrr_0.2.5 magrittr_1.5 codetools_0.2-15
## [25] scales_1.0.0 htmltools_0.3.6 assertthat_0.2.0 colorspace_1.3-2
## [29] labeling_0.3 stringi_1.2.4 lazyeval_0.2.1 munsell_0.5.0
## [33] crayon_1.3.4